import requests
import http.cookiejar as cookielib
import hmac
from hashlib import sha1
import re
import json
import time
# 利用session保持连接避免重复登录
session = requests.session()
# 存储cookier
session.cookies = cookielib.LWPCookieJar(filename='cookies.txt')

try:
    session.cookies.load(ignore_discard=True)
except:
    print('无cookies可供加载')

# 伪造的header请求头
agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36"
header = {
    "HOST": "www.zhihu.com",
    "Referer": "https://www.zhihu.com",
    "User-Agent": agent,
    'Connection': 'keep-alive'
}


def get_xsrf():
    """
    最新版本的知乎登录已经没有在页面源码上隐藏xsrf了，在对应的cookies信息中有存储
    :return:
    """
    response = requests.get('https://www.zhihu.com/signup', headers=header)
    cookies = response.cookies
    xsrf = cookies['_xsrf']
    return xsrf


def get_signature(time_str):
    # 生成signature,利用hmac加密
    # 根据分析之后的js，可发现里面有一段是进行hmac加密的
    # 分析执行加密的js 代码，可得出加密的字段，利用python 进行hmac解码
    # 分析加密js的地址是 https://static.zhihu.com/heifetz/main.app.70053eec22abb06fcb69.js
    # 上面的js文件中有对signature的加密生成过程
    h = hmac.new(key='d1b964811afb40118a12068ff74a12f4'.encode('utf-8'), digestmod=sha1)
    grant_type = 'password'
    client_id = 'c3cef7c66a1843f8b3a9e6a1e3160e20'
    source = 'com.zhihu.web'
    now = time_str
    h.update((grant_type + client_id + source + now).encode('utf-8'))
    return h.hexdigest()


def is_login():
    """
    函数用来验证当前是否是登录状态
    :return:
    """
    # 通过访问个人中心的返回码来判断当前是否已经登录了
    response = requests.get('https://www.zhihu.com/inbox', headers=header, allow_redirects=False)
    print(response.status_code)
    if response.status_code != 200:
        zhihu_login('18963970962', 'ksky0316')
    else:
        print('已经模拟登录了')


def get_captchCode():
    """
    有些情况下是需要输入验证码的，当前的这个函数用来获取验证码并显示出来以供输入
    :return:
    """
    response = requests.get('https://www.zhihu.com/api/v3/oauth/captcha?lang=en', headers=header)
    match_obj = re.findall('"show_captcha":(\w+)', response.text)
    print(match_obj)
    if match_obj[0] == False:
        return ''
    else:
        xsrf = get_xsrf()
        header.update({
            "authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20",  # 固定值
            "x-xsrftoken": xsrf
        })
        response = requests.put('https://www.zhihu.com/api/v3/oauth/captcha?lang=cn', headers=header)
        show_captcha = json.loads(response.text)
        print(show_captcha)

def zhihu_login(username, password):
    """
    :param username:
    :param password:
    :return:
    """
    post_url = 'https://www.zhihu.com/api/v3/oauth/sign_in'
    xsrf = get_xsrf()
    header.update({
        "authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20",  # 固定值
        "X-Xsrftoken": xsrf
    })
    time_str = str(int((time.time() * 1000)))
    # 直接写在引号内的值为固定值，
    # 只要知乎不改版反爬虫措施，这些值都不湖边
    post_data = {
        "client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20",
        "grant_type": "password",
        "timestamp": time_str,
        "source": "com.zhihu.web",
        "password": password,
        "username": username,
        "captcha": "",
        "lang": "en",
        "ref_source": "homepage",
        "utm_source": "",
        "signature": get_signature(time_str),
        # 'captcha': get_captchCode(header)
    }
    response = requests.post(post_url, data=post_data, headers=header, cookies=session.cookies)
    if response.status_code == 200:
        session.cookies.save()
    else:
        print('登录失败')


is_login()



